|IS 608 KNOWLEDGE AND VISUAL ANALYTICS - FINAL PROJECT | Data Analytics
Aim: The goal of this project is to:
Methodology:
The project would lay more emphasis on the explanatory techniques. It will be used in making data presentation to the viewers in a more succinct way. I therefore plan to use the R programing language to explore and analysis the dataset.
The dataset to be used is the World Health Nutrition and Population Statistics from year 2010 to 2016 . This can be obtained from http://databank.worldbank.org/data/reports.aspx?source=health-nutrition-and-population-statistics#advancedDownloadOptions .
Load this libraries and dataset and lets get to work!
suppressMessages(library(knitr))
suppressMessages(library(plyr))
suppressMessages(library(tidyr))
suppressMessages(library(dplyr))
suppressMessages(library(ggplot2))
suppressMessages(library(plotly))
suppressMessages(library(sqldf))
suppressMessages(library(MASS))
suppressMessages(library(reshape2))
suppressMessages(library(Amelia))
suppressMessages(library(mice))
suppressPackageStartupMessages(library(googleVis))
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
suppressMessages(library(ggdendro))
df <- read.csv("WorldHealth.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)
kable(head(df[1:10, ]))
| Year_Code | Country_Name | Country_Code | Adults_15_living_HIV | Adults_Children_0_14_15_living_HIV | AIDS_estimated_deaths_UNAIDS | Adults_children_0_14_15_newly_infected_HIV | Adults_15_newly_infected_HIV | Children_0_14_living_with_HIV | Children_orphaned_by_HIV_AIDS | Children_0_14_newly_infected_HIV | Incidence_tuberculosis_per_100000 | Labor_force_total | Mortality_traffic_injury_100K | Population_female | Population_male | Population_total | Malaria_cases_reported | Suicide_mortality_per_100K | Tuberculosis_death_per_100K | Tuberculosis_case_detection | Tuberculosis_treatment_success_NewCases |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| YR2010 | Afghanistan | AFG | 4300 | 4500 | 500 | 1000 | 1000 | 200 | 3000 | 100 | 189.0 | 7707349 | 15.3 | 14005473 | 14797694 | 28803167 | 69397 | 5.1 | 41.00 | 53 | 86 |
| YR2010 | Albania | ALB | 1000 | 1000 | 100 | 200 | 200 | NA | NA | NA | 17.0 | 1216574 | 15.4 | 1451422 | 1461599 | 2913021 | NA | 5.3 | 0.20 | 88 | 89 |
| YR2010 | Algeria | DZA | 9000 | 9200 | 500 | 1000 | 1000 | 500 | 2100 | 100 | 77.0 | 11115092 | 24.7 | 17887239 | 18230398 | 36117637 | 1 | 3.4 | 8.50 | 80 | 89 |
| YR2010 | American Samoa | ASM | NA | NA | NA | NA | NA | NA | NA | NA | 8.3 | NA | NA | NA | NA | 55637 | NA | NA | 0.68 | 87 | 100 |
| YR2010 | Andorra | AND | NA | NA | NA | NA | NA | NA | NA | NA | 9.5 | NA | NA | NA | NA | 84449 | NA | NA | 0.78 | 87 | 86 |
| YR2010 | Angola | AGO | 190000 | 210000 | 9400 | 25000 | 21000 | 18000 | 94000 | 3800 | 384.0 | 8394741 | 20.8 | 11936016 | 11433115 | 23369131 | 1682870 | 20.6 | 58.00 | 55 | 48 |
lat_long <- read.csv("Countries_long_lat2.csv", header = TRUE, sep = ",")
colnames(lat_long) <- c("Country_Name", "Country_Code", "Latitude", "Longtitude")
kable(head(lat_long))
| Country_Name | Country_Code | Latitude | Longtitude |
|---|---|---|---|
| Albania | ALB | 41.0000 | 20.0000 |
| Algeria | DZA | 28.0000 | 3.0000 |
| American Samoa | ASM | -14.3333 | -170.0000 |
| Andorra | AND | 42.5000 | 1.6000 |
| Angola | AGO | -12.5000 | 18.5000 |
| Anguilla | AIA | 18.2500 | -63.1667 |
options(warn = -1)
df2 <- merge(df, lat_long, by.x = "Country_Name", by.y = "Country_Name", all = TRUE)
head(df2)
na_count_before <-sapply(df, function(a) sum(is.na(a)))
na_count_after <-sapply(df2, function(b) sum(is.na(b)))
kable(na_count_before)
| Year_Code | 0 |
| Country_Name | 0 |
| Country_Code | 0 |
| Adults_15_living_HIV | 511 |
| Adults_Children_0_14_15_living_HIV | 516 |
| AIDS_estimated_deaths_UNAIDS | 540 |
| Adults_children_0_14_15_newly_infected_HIV | 576 |
| Adults_15_newly_infected_HIV | 571 |
| Children_0_14_living_with_HIV | 684 |
| Children_orphaned_by_HIV_AIDS | 684 |
| Children_0_14_newly_infected_HIV | 678 |
| Incidence_tuberculosis_per_100000 | 55 |
| Labor_force_total | 160 |
| Mortality_traffic_injury_100K | 916 |
| Population_female | 118 |
| Population_male | 118 |
| Population_total | 4 |
| Malaria_cases_reported | 703 |
| Suicide_mortality_per_100K | 916 |
| Tuberculosis_death_per_100K | 55 |
| Tuberculosis_case_detection | 101 |
| Tuberculosis_treatment_success_NewCases | 345 |
kable(na_count_after)
| Country_Name | 0 |
| Year_Code | 68 |
| Country_Code.x | 68 |
| Adults_15_living_HIV | 579 |
| Adults_Children_0_14_15_living_HIV | 584 |
| AIDS_estimated_deaths_UNAIDS | 608 |
| Adults_children_0_14_15_newly_infected_HIV | 644 |
| Adults_15_newly_infected_HIV | 639 |
| Children_0_14_living_with_HIV | 752 |
| Children_orphaned_by_HIV_AIDS | 752 |
| Children_0_14_newly_infected_HIV | 746 |
| Incidence_tuberculosis_per_100000 | 123 |
| Labor_force_total | 228 |
| Mortality_traffic_injury_100K | 984 |
| Population_female | 186 |
| Population_male | 186 |
| Population_total | 72 |
| Malaria_cases_reported | 771 |
| Suicide_mortality_per_100K | 984 |
| Tuberculosis_death_per_100K | 123 |
| Tuberculosis_case_detection | 169 |
| Tuberculosis_treatment_success_NewCases | 413 |
| Country_Code.y | 246 |
| Latitude | 246 |
| Longtitude | 246 |
#Covet the columns to Numeric
df2[, c(4:22)] <- sapply(df2[, c(4:22)], as.numeric)
glimpse(df2)
## Observations: 1,364
## Variables: 25
## $ Country_Name <chr> "Afghanistan", "Afg...
## $ Year_Code <chr> "YR2015", "YR2013",...
## $ Country_Code.x <chr> "AFG", "AFG", "AFG"...
## $ Adults_15_living_HIV <dbl> 6700, 5600, 4700, 5...
## $ Adults_Children_0_14_15_living_HIV <dbl> 6900, 5800, 4900, 5...
## $ AIDS_estimated_deaths_UNAIDS <dbl> 500, 500, 500, 500,...
## $ Adults_children_0_14_15_newly_infected_HIV <dbl> 1000, 1000, 1000, 1...
## $ Adults_15_newly_infected_HIV <dbl> 1000, 1000, 1000, 1...
## $ Children_0_14_living_with_HIV <dbl> 500, 200, 200, 200,...
## $ Children_orphaned_by_HIV_AIDS <dbl> 4200, 3700, 3200, 3...
## $ Children_0_14_newly_infected_HIV <dbl> 100, 100, 100, 100,...
## $ Incidence_tuberculosis_per_100000 <dbl> 189.0, 189.0, 189.0...
## $ Labor_force_total <dbl> 9825616, 8916157, 8...
## $ Mortality_traffic_injury_100K <dbl> 15.2, NA, NA, NA, N...
## $ Population_female <dbl> 16346869, 15398276,...
## $ Population_male <dbl> 17389625, 16333412,...
## $ Population_total <dbl> 33736494, 31731688,...
## $ Malaria_cases_reported <dbl> 86895, 39263, 77549...
## $ Suicide_mortality_per_100K <dbl> 5.5, NA, NA, NA, NA...
## $ Tuberculosis_death_per_100K <dbl> 37.00, 41.00, 42.00...
## $ Tuberculosis_case_detection <dbl> 58, 53, 51, 51, 53,...
## $ Tuberculosis_treatment_success_NewCases <dbl> NA, 88, 88, 88, 87,...
## $ Country_Code.y <fctr> AFG, AFG, AFG, AFG...
## $ Latitude <dbl> 33.0000, 33.0000, 3...
## $ Longtitude <dbl> 65.0, 65.0, 65.0, 6...
Merging column lonitude and Latitude together for a better coordinate to be in maps (googlevis)
df2$Lat_Long = paste(df2$Latitude, df2$Longtitude, sep=":")
kable(head(df2))
| Country_Name | Year_Code | Country_Code.x | Adults_15_living_HIV | Adults_Children_0_14_15_living_HIV | AIDS_estimated_deaths_UNAIDS | Adults_children_0_14_15_newly_infected_HIV | Adults_15_newly_infected_HIV | Children_0_14_living_with_HIV | Children_orphaned_by_HIV_AIDS | Children_0_14_newly_infected_HIV | Incidence_tuberculosis_per_100000 | Labor_force_total | Mortality_traffic_injury_100K | Population_female | Population_male | Population_total | Malaria_cases_reported | Suicide_mortality_per_100K | Tuberculosis_death_per_100K | Tuberculosis_case_detection | Tuberculosis_treatment_success_NewCases | Country_Code.y | Latitude | Longtitude | Lat_Long |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Afghanistan | YR2015 | AFG | 6700 | 6900 | 500 | 1000 | 1000 | 500 | 4200 | 100 | 189 | 9825616 | 15.2 | 16346869 | 17389625 | 33736494 | 86895 | 5.5 | 37 | 58 | NA | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2013 | AFG | 5600 | 5800 | 500 | 1000 | 1000 | 200 | 3700 | 100 | 189 | 8916157 | NA | 15398276 | 16333412 | 31731688 | 39263 | NA | 41 | 53 | 88 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2011 | AFG | 4700 | 4900 | 500 | 1000 | 1000 | 200 | 3200 | 100 | 189 | 8050184 | NA | 14444001 | 15264598 | 29708599 | 77549 | NA | 42 | 51 | 88 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2012 | AFG | 5200 | 5300 | 500 | 1000 | 1000 | 200 | 3500 | 100 | 189 | 8458402 | NA | 14912657 | 15784301 | 30696958 | 54840 | NA | 42 | 51 | 88 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2014 | AFG | 6200 | 6400 | 500 | 1000 | 1000 | 500 | 4000 | 100 | 189 | 9397624 | NA | 15881092 | 16876928 | 32758020 | 145282 | NA | 41 | 53 | 87 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2010 | AFG | 4300 | 4500 | 500 | 1000 | 1000 | 200 | 3000 | 100 | 189 | 7707349 | 15.3 | 14005473 | 14797694 | 28803167 | 69397 | 5.1 | 41 | 53 | 86 | AFG | 33 | 65 | 33:65 |
missmap(df2, legend = TRUE, col = c("wheat","darkred", col=c('yellow', 'darkgreen')), main ="Plot Showing The Missing Values Per Observation",
y.cex = 0.8, x.cex = 0.8,csvar = NULL, tsvar =NULL, rank.order = TRUE)
mice_missing_val <- mice(df2, m=1, method='cart', printFlag=FALSE, where = is.na(df2))
mice_missing_val_comp <- complete(mice_missing_val, action='long', include=FALSE)
df2_cleaned <- na.omit(mice_missing_val_comp)
df2_cleaned <- df2_cleaned[, -c(1:2)]
kable(head(df2_cleaned))
| Country_Name | Year_Code | Country_Code.x | Adults_15_living_HIV | Adults_Children_0_14_15_living_HIV | AIDS_estimated_deaths_UNAIDS | Adults_children_0_14_15_newly_infected_HIV | Adults_15_newly_infected_HIV | Children_0_14_living_with_HIV | Children_orphaned_by_HIV_AIDS | Children_0_14_newly_infected_HIV | Incidence_tuberculosis_per_100000 | Labor_force_total | Mortality_traffic_injury_100K | Population_female | Population_male | Population_total | Malaria_cases_reported | Suicide_mortality_per_100K | Tuberculosis_death_per_100K | Tuberculosis_case_detection | Tuberculosis_treatment_success_NewCases | Country_Code.y | Latitude | Longtitude | Lat_Long |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Afghanistan | YR2015 | AFG | 6700 | 6900 | 500 | 1000 | 1000 | 500 | 4200 | 100 | 189 | 9825616 | 15.2 | 16346869 | 17389625 | 33736494 | 86895 | 5.5 | 37 | 58 | 87 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2013 | AFG | 5600 | 5800 | 500 | 1000 | 1000 | 200 | 3700 | 100 | 189 | 8916157 | 15.3 | 15398276 | 16333412 | 31731688 | 39263 | 5.2 | 41 | 53 | 88 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2011 | AFG | 4700 | 4900 | 500 | 1000 | 1000 | 200 | 3200 | 100 | 189 | 8050184 | 26.3 | 14444001 | 15264598 | 29708599 | 77549 | 5.9 | 42 | 51 | 88 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2012 | AFG | 5200 | 5300 | 500 | 1000 | 1000 | 200 | 3500 | 100 | 189 | 8458402 | 23.7 | 14912657 | 15784301 | 30696958 | 54840 | 5.2 | 42 | 51 | 88 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2014 | AFG | 6200 | 6400 | 500 | 1000 | 1000 | 500 | 4000 | 100 | 189 | 9397624 | 28.4 | 15881092 | 16876928 | 32758020 | 145282 | 5.1 | 41 | 53 | 87 | AFG | 33 | 65 | 33:65 |
| Afghanistan | YR2010 | AFG | 4300 | 4500 | 500 | 1000 | 1000 | 200 | 3000 | 100 | 189 | 7707349 | 15.3 | 14005473 | 14797694 | 28803167 | 69397 | 5.1 | 41 | 53 | 86 | AFG | 33 | 65 | 33:65 |
na_count_after_cleaning <-sapply(df2_cleaned, function(c) sum(is.na(c)))
kable(na_count_after_cleaning)
| Country_Name | 0 |
| Year_Code | 0 |
| Country_Code.x | 0 |
| Adults_15_living_HIV | 0 |
| Adults_Children_0_14_15_living_HIV | 0 |
| AIDS_estimated_deaths_UNAIDS | 0 |
| Adults_children_0_14_15_newly_infected_HIV | 0 |
| Adults_15_newly_infected_HIV | 0 |
| Children_0_14_living_with_HIV | 0 |
| Children_orphaned_by_HIV_AIDS | 0 |
| Children_0_14_newly_infected_HIV | 0 |
| Incidence_tuberculosis_per_100000 | 0 |
| Labor_force_total | 0 |
| Mortality_traffic_injury_100K | 0 |
| Population_female | 0 |
| Population_male | 0 |
| Population_total | 0 |
| Malaria_cases_reported | 0 |
| Suicide_mortality_per_100K | 0 |
| Tuberculosis_death_per_100K | 0 |
| Tuberculosis_case_detection | 0 |
| Tuberculosis_treatment_success_NewCases | 0 |
| Country_Code.y | 0 |
| Latitude | 0 |
| Longtitude | 0 |
| Lat_Long | 0 |
kable(summary(df2_cleaned[, 4:22]))
| Adults_15_living_HIV | Adults_Children_0_14_15_living_HIV | AIDS_estimated_deaths_UNAIDS | Adults_children_0_14_15_newly_infected_HIV | Adults_15_newly_infected_HIV | Children_0_14_living_with_HIV | Children_orphaned_by_HIV_AIDS | Children_0_14_newly_infected_HIV | Incidence_tuberculosis_per_100000 | Labor_force_total | Mortality_traffic_injury_100K | Population_female | Population_male | Population_total | Malaria_cases_reported | Suicide_mortality_per_100K | Tuberculosis_death_per_100K | Tuberculosis_case_detection | Tuberculosis_treatment_success_NewCases | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Min. : 100 | Min. : 100 | Min. : 100 | Min. : 100 | Min. : 100 | Min. : 100 | Min. : 500 | Min. : 100 | Min. : 0.0 | Min. :1.389e+05 | Min. : 2.40 | Min. :1.454e+05 | Min. :1.341e+05 | Min. :2.796e+05 | Min. : 0 | Min. : 0.000 | Min. : 0.11 | Min. : 15.00 | Min. : 0.00 | |
| 1st Qu.: 8300 | 1st Qu.: 8700 | 1st Qu.: 275 | 1st Qu.: 1000 | 1st Qu.: 1000 | 1st Qu.: 500 | 1st Qu.: 3500 | 1st Qu.: 100 | 1st Qu.: 28.0 | 1st Qu.:1.918e+06 | 1st Qu.:13.65 | 1st Qu.:2.199e+06 | 1st Qu.:2.194e+06 | 1st Qu.:4.350e+06 | 1st Qu.: 411 | 1st Qu.: 5.825 | 1st Qu.: 2.00 | 1st Qu.: 58.00 | 1st Qu.: 75.00 | |
| Median : 37000 | Median : 40500 | Median : 1500 | Median : 2400 | Median : 2000 | Median : 1750 | Median : 18500 | Median : 500 | Median : 100.0 | Median :5.089e+06 | Median :20.00 | Median :5.661e+06 | Median :5.620e+06 | Median :1.131e+07 | Median : 17549 | Median : 8.400 | Median : 9.30 | Median : 76.00 | Median : 83.00 | |
| Mean : 514638 | Mean : 555964 | Mean : 20816 | Mean : 32101 | Mean : 28040 | Mean : 43017 | Mean : 318681 | Mean : 4333 | Mean : 165.4 | Mean :4.363e+07 | Mean :19.90 | Mean :4.838e+07 | Mean :4.923e+07 | Mean :9.763e+07 | Mean : 485136 | Mean :10.275 | Mean :19.00 | Mean : 70.25 | Mean : 79.09 | |
| 3rd Qu.: 170000 | 3rd Qu.: 180000 | 3rd Qu.: 6200 | 3rd Qu.: 9950 | 3rd Qu.: 9225 | 3rd Qu.: 12750 | 3rd Qu.: 90000 | 3rd Qu.: 1000 | 3rd Qu.: 235.0 | 3rd Qu.:1.233e+07 | 3rd Qu.:27.00 | 3rd Qu.:1.476e+07 | 3rd Qu.:1.574e+07 | 3rd Qu.:3.030e+07 | 3rd Qu.: 277235 | 3rd Qu.:12.575 | 3rd Qu.:30.00 | 3rd Qu.: 87.00 | 3rd Qu.: 87.00 | |
| Max. :33800000 | Max. :36100000 | Max. :1500000 | Max. :2200000 | Max. :1900000 | Max. :2600000 | Max. :19700000 | Max. :300000 | Max. :1246.0 | Max. :3.406e+09 | Max. :45.40 | Max. :3.644e+09 | Max. :3.708e+09 | Max. :7.355e+09 | Max. :11627473 | Max. :35.300 | Max. :99.00 | Max. :110.00 | Max. :100.00 |
Average Kids Orphaned By HIV/AIDS
All_Cen <- sqldf("select Country_Name, Year_Code, (Children_orphaned_by_HIV_AIDS/Population_total)*100 as 'Average_Orphaned_byHIV' from df2_cleaned where Average_Orphaned_byHIV >= 2 order by Average_Orphaned_byHIV desc limit 50" )
# Boxplot
g <- ggplot(All_Cen, aes(Year_Code, Average_Orphaned_byHIV)) + geom_boxplot(varwidth=T, fill="plum") +
labs(title="Box plot",
subtitle="Average_Orphaned_byHIV grouped by Year",
caption="Source: The World Bank Data Bank",
x="Year",
y="Avg. Children Orphaned By HIV/AIDS")
ggplotly(g)
theme_set(theme_bw())
# Draw plot
p1 <- ggplot(All_Cen, aes(x=Country_Name, y=Average_Orphaned_byHIV)) +
geom_bar(stat="identity", width=.5, fill="tomato3") +
labs(title="Top 50 Countries Where Children Orphaned By HIV/AIDS Per Pop (>= 2)",
subtitle="Country Vs Avg. Orphaned By HIV/AIDS ",
caption="source: The World Bank Data Bank") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
ggplotly(p1)
# Histogram on a Continuous (Numeric) Variable
g <- ggplot(All_Cen, aes(Average_Orphaned_byHIV)) + scale_fill_brewer(palette = "Spectral") + geom_histogram(aes(fill=Year_Code),
bins=20,
col="black",
size=.05) + # change number of bins
labs(title="Top 50 Countries Where Children Orphaned By HIV/AIDS Per Pop (>= 2)",
subtitle="Average Children Orpaned By HIV/AIDS")
ggplotly(g)
ggplotly(ggplot(All_Cen, aes(x=Average_Orphaned_byHIV, y=Country_Name, fill=Year_Code)) +
geom_point(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(All_Cen$Year_Code))))
p <- ggplot(data =All_Cen, aes(x = Country_Name, y =Average_Orphaned_byHIV ))+ geom_boxplot(aes(color = Year_Code)) + facet_wrap(~Year_Code) + ggtitle("Top 50 Countries Where Children Orphaned By HIV/AIDS Per Pop (>= 2)") +
xlab("Countries") + ylab("Average_Orphaned_byHIV")
ggplotly(p)
INCIDENCE RATES
“Incidence rate or person-time rate is a measure of incidence that incorporates time directly into the denominator. A person-time rate is generally calculated from a long-term cohort follow-up study, wherein enrollees are followed over time and the occurrence of new cases of disease is documented.”" —CDC
\(Incidence\quad Rate\quad =\quad \frac { Number\quad of\quad newcases\quad of\quad disease\quad or\quad injury\quad during\quad specified\quad period }{ Time\quad each\quad person\quad was\quad observed,\quad totaled\quad for\quad all\quad persons }\)
Incident_Hiv_All_Ages <- sqldf("select Population_total, Lat_Long,Country_Name, Year_Code, (Adults_children_0_14_15_newly_infected_HIV/Population_total)*100 as 'HIV_Incident_Rate' from df2_cleaned where HIV_Incident_Rate >= 0.5 order by HIV_Incident_Rate desc limit 30" )
kable(head(Incident_Hiv_All_Ages))
| Population_total | Lat_Long | Country_Name | Year_Code | HIV_Incident_Rate |
|---|---|---|---|---|
| 2117361 | -29.5:28.5 | Lesotho | YR2013 | 1.086258 |
| 1202843 | -26.5:31.5 | Swaziland | YR2010 | 1.080773 |
| 2040551 | -29.5:28.5 | Lesotho | YR2010 | 1.078140 |
| 2089928 | -29.5:28.5 | Lesotho | YR2012 | 1.052668 |
| 2145785 | -29.5:28.5 | Lesotho | YR2014 | 1.025266 |
| 2064166 | -29.5:28.5 | Lesotho | YR2011 | 1.017360 |
Incident_Tuber_All_Ages <- sqldf("select Population_total, Lat_Long,Country_Name, Year_Code, (Tuberculosis_case_detection/Population_total)*100 as 'Tuberculocisis_Incident_Rate' from df2_cleaned where Tuberculocisis_Incident_Rate >= 0.01 order by Tuberculocisis_Incident_Rate, Year_Code desc limit 30" )
kable(head(Incident_Tuber_All_Ages))
| Population_total | Lat_Long | Country_Name | Year_Code | Tuberculocisis_Incident_Rate |
|---|---|---|---|---|
| 768514 | 5:-59 | Guyana | YR2015 | 0.0104097 |
| 763393 | 5:-59 | Guyana | YR2014 | 0.0104795 |
| 758081 | 5:-59 | Guyana | YR2013 | 0.0105530 |
| 753091 | 5:-59 | Guyana | YR2012 | 0.0106229 |
| 749100 | 5:-59 | Guyana | YR2011 | 0.0106795 |
| 746556 | 5:-59 | Guyana | YR2010 | 0.0107159 |
Hiv_Inc_Plot <- ggplot(data =Incident_Hiv_All_Ages, aes(y = Country_Name, x =HIV_Incident_Rate ))+ geom_boxplot(aes(color = Year_Code)) + facet_wrap(~Year_Code) + ggtitle("Top 30 Countries With HIV/AIDS Incident Rate (>= 0.5)") +
xlab("Countries") + ylab("HIV Incident Rates")
ggplotly(Hiv_Inc_Plot)
Hiv_Inc_Map <- gvisGeoChart(Incident_Hiv_All_Ages, locationvar ="Lat_Long", hovervar ="Country_Name",sizevar = "HIV_Incident_Rate", colorvar = "Population_total",
options=list(displayMode="Markers",
colorAxis="{colors:['purple', 'red', 'orange', 'grey', 'pink']}",
backgroundColor="lightblue"), chartid="EQ")
plot(Hiv_Inc_Map )
## starting httpd help server ... done
ggplotly(ggplot(Incident_Tuber_All_Ages, aes(x=Tuberculocisis_Incident_Rate, y=Country_Name, fill=Year_Code)) +
geom_point(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(All_Cen$Year_Code))))
p4 <- ggplot(data =Incident_Tuber_All_Ages, aes(y = Country_Name, x =Tuberculocisis_Incident_Rate ))+ geom_boxplot(aes(color = Year_Code)) + facet_wrap(~Year_Code) + ggtitle("Top Countries with High Tuberculosis Incidence Rate") +
ylab("Countries") + xlab("Year")
ggplotly(p4)
Tub_Inc_Map <- gvisGeoChart(Incident_Tuber_All_Ages, locationvar ="Lat_Long", hovervar ="Country_Name",sizevar = "Tuberculocisis_Incident_Rate", colorvar = "Population_total",
options=list(displayMode="Markers",
colorAxis="{colors:['purple', 'red', 'orange', 'grey', 'pink']}",
backgroundColor="lightblue"), chartid="ER")
plot(Tub_Inc_Map )